From: Joey Hess Date: Mon, 22 Sep 2025 18:56:50 +0000 (-0400) Subject: prevent deadlock when reconcileStaged runs restagePointerFiles X-Git-Tag: archive/raspbian/10.20251029-1+rpi1~1^2~3^2~78 X-Git-Url: https://dgit.raspbian.org/%22http://www.example.com/cgi/%22/%22http:/www.example.com/cgi/%22?a=commitdiff_plain;h=16d7432a2f6e085b6fec8ccce35bc41faea67745;p=git-annex.git prevent deadlock when reconcileStaged runs restagePointerFiles Fix hang that could occur when using git-annex adjust on a branch with a number of files greater than annex.queuesize. Or potentially other commands. When reconcileStaged is running, the database is being opened. But restagePointerFiles closes the database, and later writes to it. So it will deadlock if called by reconcileStaged. The deadlock occurred when the git queue happened to be full, causing adding a call to restagePointerFiles to it to flush the queue and restagePointerFiles to run at the wrong time. Fixed by making reconcileStaged, when it populates or depopulates a pointer file, arrange for restagePointerFiles to be run as a cleanup action, rather than from the git queue. But, what if restagePointerFiles is already in the git queue before reconcileStaged is run? If it adds anything else to the git queue, causing the queue to flush, it would still deadlock. To avoid this hypothetical situation, added a Annex.inreconcilestaged, and made restagePointerFiles check it and not do anything. Note that, I did consider the simpler approach of only running restagePointerFiles as a cleanup action, rather than from the git queue. But see commit 6a3bd283b8af53f810982e002e435c0d7c040c59 for why it was made to use the queue in the first place. I wanted to avoid tying this bug fix to a behavior change. Sponsored-by: mycroft --- diff --git a/Annex.hs b/Annex.hs index aba23587fb..421d152bf6 100644 --- a/Annex.hs +++ b/Annex.hs @@ -226,6 +226,7 @@ data AnnexState = AnnexState , cachedgitenv :: Maybe (AltIndexFile, OsPath, [(String, String)]) , urloptions :: Maybe UrlOptions , insmudgecleanfilter :: Bool + , inreconcilestaged :: Bool , getvectorclock :: IO CandidateVectorClock , proxyremote :: Maybe (Either ClusterUUID (Types.Remote.RemoteA Annex)) , reposizehandle :: Maybe RepoSizeHandle @@ -283,6 +284,7 @@ newAnnexState c r = do , cachedgitenv = Nothing , urloptions = Nothing , insmudgecleanfilter = False + , inreconcilestaged = False , getvectorclock = vc , proxyremote = Nothing , reposizehandle = Nothing diff --git a/Annex/Content.hs b/Annex/Content.hs index 638390b2bf..9162c34983 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -542,7 +542,7 @@ moveAnnex key src = ifM (checkSecureHashes' key) unless (null fs) $ do destic <- withTSDelta $ liftIO . genInodeCache dest - ics <- mapM (populatePointerFile (Restage True) key dest) fs + ics <- mapM (populatePointerFile QueueRestage key dest) fs Database.Keys.addInodeCaches key (catMaybes (destic:ics)) ) @@ -784,7 +784,7 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> resetpointer file = unlessM (liftIO $ isSymbolicLink <$> R.getSymbolicLinkStatus (fromOsPath file)) $ ifM (isUnmodified key file) ( adjustedBranchRefresh $ - depopulatePointerFile key file + depopulatePointerFile QueueRestage key file -- Modified file, so leave it alone. -- If it was a hard link to the annex object, -- that object might have been frozen as part of the diff --git a/Annex/Content/PointerFile.hs b/Annex/Content/PointerFile.hs index 22657a11c8..4054296c3b 100644 --- a/Annex/Content/PointerFile.hs +++ b/Annex/Content/PointerFile.hs @@ -52,8 +52,8 @@ populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f) {- Removes the content from a pointer file, replacing it with a pointer. - - Does not check if the pointer file is modified. -} -depopulatePointerFile :: Key -> OsPath -> Annex () -depopulatePointerFile key file = do +depopulatePointerFile :: Restage -> Key -> OsPath -> Annex () +depopulatePointerFile restage key file = do st <- liftIO $ catchMaybeIO $ R.getFileStatus (fromOsPath file) let mode = fmap fileMode st secureErase file @@ -68,4 +68,4 @@ depopulatePointerFile key file = do (fmap Posix.modificationTimeHiRes st) #endif withTSDelta (liftIO . genInodeCache tmp) - maybe noop (restagePointerFile (Restage True) file) ic + maybe noop (restagePointerFile restage file) ic diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index 07b5dad282..84e6cadddf 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -172,7 +172,7 @@ ingestAdd' meterupdate ld@(Just (LockedDown cfg source)) mk = do {- Ingests a locked down file into the annex. Does not update the working - tree or the index. -} ingest :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache) -ingest meterupdate ld mk = ingest' Nothing meterupdate ld mk (Restage True) +ingest meterupdate ld mk = ingest' Nothing meterupdate ld mk QueueRestage ingest' :: Maybe Backend -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Restage -> Annex (Maybe Key, Maybe InodeCache) ingest' _ _ Nothing _ _ = return (Nothing, Nothing) @@ -228,7 +228,7 @@ ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage = finishIngestUnlocked :: Key -> KeySource -> Annex () finishIngestUnlocked key source = do cleanCruft source - finishIngestUnlocked' key source (Restage True) Nothing + finishIngestUnlocked' key source QueueRestage Nothing finishIngestUnlocked' :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex () finishIngestUnlocked' key source restage lar = do diff --git a/Annex/Link.hs b/Annex/Link.hs index 480a00ce25..b3c962a772 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -7,7 +7,7 @@ - - Pointer files are used instead of symlinks for unlocked files. - - - Copyright 2013-2022 Joey Hess + - Copyright 2013-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -32,6 +32,7 @@ import Git.Config import Annex.HashObject import Annex.InodeSentinal import Annex.PidLock +import Types.CleanupActions import Utility.FileMode import Utility.InodeCache import Utility.Tmp.Dir @@ -160,7 +161,10 @@ writePointerFile file k mode = do F.writeFile' file (formatPointer k) maybe noop (R.setFileMode (fromOsPath file)) mode -newtype Restage = Restage Bool +data Restage + = NoRestage + | QueueRestage + | LaterRestage {- Restage pointer file. This is used after updating a worktree file - when content is added/removed, to prevent git status from showing @@ -184,26 +188,27 @@ newtype Restage = Restage Bool - and will store it in the restage log. Displays a message to help the - user understand why the file will appear to be modified. - - - This uses the git queue, so the update is not performed immediately, - - and this can be run multiple times cheaply. Using the git queue also - - prevents building up too large a number of updates when many files - - are being processed. It's also recorded in the restage log so that, - - if the process is interrupted before the git queue is fulushed, the - - restage will be taken care of later. + - The update is not performed immediately, so and this can be run multiple + - times cheaply. It's also recorded in the restage log so that, if the + - process is interrupted before the git queue is fulushed, the restage + - will be taken care of later. -} restagePointerFile :: Restage -> OsPath -> InodeCache -> Annex () -restagePointerFile (Restage False) f orig = do +restagePointerFile NoRestage f orig = do flip writeRestageLog orig =<< inRepo (toTopFilePath f) toplevelWarning True $ unableToRestage $ Just f -restagePointerFile (Restage True) f orig = do +{- Using the git queue prevents building up too large a number of updates + - when many files are being processed. -} +restagePointerFile QueueRestage f orig = do flip writeRestageLog orig =<< inRepo (toTopFilePath f) - -- Avoid refreshing the index if run by the - -- smudge clean filter, because git uses that when - -- it's already refreshing the index, probably because - -- this very action is running. Running it again would likely - -- deadlock. unlessM (Annex.getState Annex.insmudgecleanfilter) $ Annex.Queue.addFlushAction restagePointerFileRunner [f] +{- Defer the restage until the end. -} +restagePointerFile LaterRestage f orig = do + flip writeRestageLog orig =<< inRepo (toTopFilePath f) + unlessM (Annex.getState Annex.insmudgecleanfilter) $ + Annex.addCleanupAction RestagePointerFiles $ + restagePointerFiles =<< Annex.gitRepo restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex restagePointerFileRunner = @@ -219,7 +224,7 @@ restagePointerFileRunner = -- to bypass the lock. Then replace the old index file with the new -- updated index file. restagePointerFiles :: Git.Repo -> Annex () -restagePointerFiles r = unlessM (Annex.getState Annex.insmudgecleanfilter) $ do +restagePointerFiles r = checkcanrun $ do -- Flush any queued changes to the keys database, so they -- are visible to child processes. -- The database is closed because that may improve behavior @@ -330,6 +335,9 @@ restagePointerFiles r = unlessM (Annex.getState Annex.insmudgecleanfilter) $ do ck = ConfigKey "filter.annex.process" ckd = ConfigKey "filter.annex.process-temp-disabled" + checkcanrun a = unlessM (Annex.getState Annex.insmudgecleanfilter) $ + unlessM (Annex.getState Annex.inreconcilestaged) $ a + unableToRestage :: Maybe OsPath -> StringContainingQuotedPath unableToRestage mf = "git status will show " <> maybe "some files" QuotedPath mf diff --git a/CHANGELOG b/CHANGELOG index 530a560e34..821a554794 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -21,6 +21,8 @@ git-annex (10.20250829) UNRELEASED; urgency=medium * Add git-remote-p2p-annex and git-remote-tor-annex to standalone builds. * enableremote: Disallow using type= to attempt to change the type of an existing remote. + * Fix hang that could occur when using git-annex adjust on a branch with + a number of files greater than annex.queuesize. -- Joey Hess Fri, 29 Aug 2025 12:34:06 -0400 diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index a58bfc6a70..8404cd6665 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -42,7 +42,7 @@ seek ps = do =<< isAnnexLink f -- after a merge conflict or git cherry-pick or stash, pointer -- files in the worktree won't be populated, so populate them here - Command.Smudge.updateSmudged (Restage False) + Command.Smudge.updateSmudged NoRestage runAnnexHook preCommitAnnexHook annexPreCommitCommand diff --git a/Command/Smudge.hs b/Command/Smudge.hs index c381c0aa5a..22d991d703 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -173,7 +173,7 @@ clean' file mk passthrough discardreststdin emitpointer = doingest preferredbackend = do -- Can't restage associated files because git add -- runs this and has the index locked. - let norestage = Restage False + let norestage = NoRestage emitpointer =<< postingest =<< (\ld -> ingest' preferredbackend nullMeterUpdate ld Nothing norestage) @@ -294,7 +294,7 @@ getMoveRaceRecovery k file = void $ tryNonAsync $ obj <- calcRepo (gitAnnexLocation k) -- Cannot restage because git add is running and has -- the index locked. - populatePointerFile (Restage False) k obj file >>= \case + populatePointerFile NoRestage k obj file >>= \case Nothing -> return () Just ic -> Database.Keys.addInodeCaches k [ic] @@ -305,7 +305,7 @@ update = do -- Doing it explicitly here avoids a later pause in the middle of -- some other action. scanAnnexedFiles - updateSmudged (Restage True) + updateSmudged LaterRestage stop updateSmudged :: Restage -> Annex () diff --git a/Command/Unlock.hs b/Command/Unlock.hs index ac8520f0f4..9b4c1beff4 100644 --- a/Command/Unlock.hs +++ b/Command/Unlock.hs @@ -67,6 +67,6 @@ perform dest key = do cleanup :: OsPath -> Maybe InodeCache -> Key -> Maybe FileMode -> CommandCleanup cleanup dest destic key destmode = do stagePointerFile dest destmode =<< hashPointerFile key - maybe noop (restagePointerFile (Restage True) dest) destic + maybe noop (restagePointerFile QueueRestage dest) destic Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath dest) return True diff --git a/Database/Keys.hs b/Database/Keys.hs index f17d07dbe7..81ac1b881b 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -267,7 +267,7 @@ reconcileStaged _ _ (DbWasOpen True) = return (DbTablesChanged False False) reconcileStaged dbisnew qh _ = ifM isBareRepo ( return mempty - , go =<< getindextree + , inReconcileStaged $ go =<< getindextree ) where lastindexref = Ref "refs/annex/last-index" @@ -433,13 +433,19 @@ reconcileStaged dbisnew qh _ = ifM isBareRepo filepopulated <- sameInodeCache p ics case (keypopulated, filepopulated) of (True, False) -> - populatePointerFile (Restage True) key obj p >>= \case + populatePointerFile restage key obj p >>= \case Nothing -> return () Just ic -> addinodecaches key (catMaybes [Just ic, mobjic]) - (False, True) -> depopulatePointerFile key p + (False, True) -> depopulatePointerFile restage key p _ -> return () + -- Cannot use QueueRestage here, because it could deadlock; + -- restagePointerFiles tries to close the database handle, + -- but the database handle is open while reconcileStaged + -- is running. + restage = LaterRestage + send :: ((Maybe Key -> Annex a, Ref) -> IO ()) -> Ref -> (Maybe Key -> Annex a) -> IO () send feeder r withk = feeder (withk, r) @@ -500,6 +506,15 @@ reconcileStaged dbisnew qh _ = ifM isBareRepo | dbisnew = SQL.newAssociatedFile | otherwise = SQL.addAssociatedFile +-- Avoid a potential deadlock. +inReconcileStaged :: Annex a -> Annex a +inReconcileStaged = bracket setup cleanup . const + where + setup = Annex.changeState $ \s -> s + { Annex.inreconcilestaged = True } + cleanup () = Annex.changeState $ \s -> s + { Annex.inreconcilestaged = False } + {- Normally the keys database is updated incrementally when opened, - by reconcileStaged. Calling this explicitly allows running the - update at an earlier point. diff --git a/Types/CleanupActions.hs b/Types/CleanupActions.hs index a15917fc1e..a6a5b4ee7d 100644 --- a/Types/CleanupActions.hs +++ b/Types/CleanupActions.hs @@ -20,6 +20,7 @@ data CleanupAction | AdjustedBranchUpdate | TorrentCleanup URLString | OtherTmpCleanup + | RestagePointerFiles deriving (Eq, Ord) data SignalAction diff --git a/doc/bugs/Unlock_filter_seems_to_deadlock_for_huge_worktree.mdwn b/doc/bugs/Unlock_filter_seems_to_deadlock_for_huge_worktree.mdwn index b4253cb281..549aa99b64 100644 --- a/doc/bugs/Unlock_filter_seems_to_deadlock_for_huge_worktree.mdwn +++ b/doc/bugs/Unlock_filter_seems_to_deadlock_for_huge_worktree.mdwn @@ -82,3 +82,4 @@ Repeatedly calling this(and ctrl-c it when it inevitably get stuck) seems to eve ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) I really like git-annex, it allowed me to deduplicate the files in the big repository described above without much issues except for this bug. +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/Unlock_filter_seems_to_deadlock_for_huge_worktree/comment_3_6d01fe7c2a17669b2b9927e157d7d27f._comment b/doc/bugs/Unlock_filter_seems_to_deadlock_for_huge_worktree/comment_3_6d01fe7c2a17669b2b9927e157d7d27f._comment new file mode 100644 index 0000000000..38644d900e --- /dev/null +++ b/doc/bugs/Unlock_filter_seems_to_deadlock_for_huge_worktree/comment_3_6d01fe7c2a17669b2b9927e157d7d27f._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2025-09-22T17:04:39Z" + content=""" +> It may be that any time the git queue gets flushed with restagePointerFileRunner +> in the queue it hangs like this. This needs further investigation. + +The deadlock occurs when restagePointerFiles calls closeDbHandle, which +blocks due to opening the db handle having called reconcileStaged, which is +still running. + +reconcileStaged itself calls populatePointerFile repeatedly. Which calls +restagePointerFile, which adds restagePointerFileRunner to the git queue. + +If adding that happens to make the git queue full, then it will flush, +which causes restagePointerFiles to run. + +Fixing this seems to need a way to avoid restagePointerFiles from being +run by anything reconcileStaged does. + +All that `git-annex smudge --update` is doing to trigger this is +calling populatePointerFile, which calls restagePointerFile. Other commands +that call that in the same situation would probably also deadlock. +"""]]